Tensorlistfromtensor

将输入张量按照第一个维度拆分成多个张量。假设输入张量的形状为 [N1, N2, N3, …],该算子将输入张量拆分成 N1 个张量,每个输出张量的形状为 [N2, N3, …]

\[\text{output\_tensors}[i] = \text{input\_tensor}[\text{slice\_at\_dim0} = i]\]

其中 i = 0, 1, ldots, N1-1,每个输出张量包含 N1 个切片中的一个。

输入:
  • input_tensor_values - 输入张量的数据指针,大小为 input_tensor_total_elements 个元素。

  • input_tensor_shape - 输入张量的形状数组(int* 类型),input_tensor_shape[0] 表示第一个维度的大小(即输出张量的数量)。

  • input_tensor_total_elements - 输入张量的总元素数(int 类型)。

  • core_mask - 核掩码(int),仅共享存储版本需要。

输出:
  • output_tensors - 输出张量数组(指针数组),大小为 input_tensor_shape[0]。每个元素 output_tensors[i] 指向第 i 个输出张量的数据。每个输出张量的大小为 input_tensor_total_elements / input_tensor_shape[0] 个元素。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp32, int8, int16, int32, fp64, cplx64, cplx128

  • MT7004 支持fp16, fp32, int16, int32, cplx64

  • 算子会复制数据,输出张量与输入张量数据独立

  • 调用前需要确保所有 output_tensors[i] 指向的内存空间足够大(至少 input_tensor_total_elements / input_tensor_shape[0] 个元素)

  • 输出张量的数量等于 input_tensor_shape[0]

  • 每个输出张量的元素数为 input_tensor_total_elements / input_tensor_shape[0]

共享存储版本:

void i8_tensorlistfromtensor_s(int8_t *input_tensor_values, int *input_tensor_shape, int8_t **output_tensors, int input_tensor_total_elements, int core_mask)
void i16_tensorlistfromtensor_s(int16_t *input_tensor_values, int *input_tensor_shape, int16_t **output_tensors, int input_tensor_total_elements, int core_mask)
void i32_tensorlistfromtensor_s(int32_t *input_tensor_values, int *input_tensor_shape, int32_t **output_tensors, int input_tensor_total_elements, int core_mask)
void hp_tensorlistfromtensor_s(half *input_tensor_values, int *input_tensor_shape, half **output_tensors, int input_tensor_total_elements, int core_mask)
void fp_tensorlistfromtensor_s(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements, int core_mask)
void dp_tensorlistfromtensor_s(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements, int core_mask)
void c64_tensorlistfromtensor_s(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements, int core_mask)
void c128_tensorlistfromtensor_s(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <tensorlistfromtensor.h>
 4
 5int main(int argc, char* argv[]) {
 6    // 假设在DDR空间
 7    // 输入张量形状 [3, 4, 5],总元素数 = 3 * 4 * 5 = 60
 8    // 输出3个张量,每个形状 [4, 5],元素数 = 60 / 3 = 20
 9
10    int input_tensor_shape[] = {3, 4, 5};
11    int input_tensor_total_elements = 3 * 4 * 5;  // 60
12
13    // 输入张量数据
14    float *input_tensor_values = (float *)0xA0000000;
15    // input_tensor_values 包含 60 个 float 元素
16
17    // 输出张量数组(需要预先分配内存)
18    float *output0 = (float *)0xB0000000;  // 第0个输出张量,20个元素
19    float *output1 = (float *)0xB0100000;  // 第1个输出张量,20个元素
20    float *output2 = (float *)0xB0200000;  // 第2个输出张量,20个元素
21
22    float* output_tensors[3] = {output0, output1, output2};
23
24    int core_mask = 0xff;
25
26    fp_tensorlistfromtensor_s(input_tensor_values, input_tensor_shape,
27                              output_tensors, input_tensor_total_elements, core_mask);
28
29    return 0;
30}

私有存储版本:

void i8_tensorlistfromtensor_p(int8_t *input_tensor_values, int *input_tensor_shape, int8_t **output_tensors, int input_tensor_total_elements)
void i16_tensorlistfromtensor_p(int16_t *input_tensor_values, int *input_tensor_shape, int16_t **output_tensors, int input_tensor_total_elements)
void i32_tensorlistfromtensor_p(int32_t *input_tensor_values, int *input_tensor_shape, int32_t **output_tensors, int input_tensor_total_elements)
void hp_tensorlistfromtensor_p(half *input_tensor_values, int *input_tensor_shape, half **output_tensors, int input_tensor_total_elements)
void fp_tensorlistfromtensor_p(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements)
void dp_tensorlistfromtensor_p(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements)
void c64_tensorlistfromtensor_p(float *input_tensor_values, int *input_tensor_shape, float **output_tensors, int input_tensor_total_elements)
void c128_tensorlistfromtensor_p(double *input_tensor_values, int *input_tensor_shape, double **output_tensors, int input_tensor_total_elements)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <tensorlistfromtensor.h>
 4
 5int main(int argc, char* argv[]) {
 6    // 假设在L2空间
 7    int input_tensor_shape[] = {3, 4, 5};
 8    int input_tensor_total_elements = 3 * 4 * 5;
 9
10    float *input_tensor_values = (float *)0x10000000;
11
12    float *output0 = (float *)0x10010000;
13    float *output1 = (float *)0x10011000;
14    float *output2 = (float *)0x10012000;
15
16    float* output_tensors[3] = {output0, output1, output2};
17
18    fp_tensorlistfromtensor_p(input_tensor_values, input_tensor_shape,
19                              output_tensors, input_tensor_total_elements);
20
21    return 0;
22}